/*

 Program readimssdata.sas reads raw IMSS data, makes files
 compdataYYYYMM30.sas7bdat.

 Kumler, Verhoogen, Frias "Enlisting Employees ..." REStat forthcoming

*/

/*
Notes:

*/

%include 'housekeeping.sas';

    
******* define macro to read 1985-2005 data **********;

%macro generatedata(firstyr, lastyr, month, day);

%do year = &firstyr %to &lastyr;
    
data temp1 (keep=dateini datefin nss registro sal tipotr mod);
  length nss $ 11 dateini $ 10 datefin $ 10 saltext $ 10 tipotrab $ 10;

  infile "&tmp./SSci_ciz1*m.txt" dlm=" " firstobs=2 expandtabs missover
      pad ; * cluster;
  input nss $ registro $ mod  dateini $ datefin $ orimovini orimovfin tpmovini tpmovfin tipotrab $ saltext $;
  
  *sas has trouble reading sal if tipotrab is missing, so if sal is missing, look in tipotrab;
  saltext=right(saltext);
  saltexta=substr(saltext,1,10);
  sal=1*saltexta;
  if (sal=.) then do;
      saltext=right(tipotrab);
      tipotrab=.;
      saltexta=substr(saltext,1,9);
      sal=1*saltexta;
      end;

  tipotrab=1*tipotrab;
  
  *get year, month, day in, convert to date;
  yearini=1*substr(dateini,1,4);
  monini=1*substr(dateini,6,2);
  dayini=1*substr(dateini,9,2);
  dateini=mdy(monini,dayini,yearini);
  *get year,month,day fin, convert to date;
  yearfin=1*substr(datefin,1,4);
  monfin=1*substr(datefin,6,2);
  dayfin=1*substr(datefin,9,2);
  datefin=mdy(monfin,dayfin,yearfin);
  *set datefin to survey date if survey date is before datefin;
  datefin=min(datefin, mdy(&month,&day,&year));
  *only keep obs with datein before survey and datefin after survey;
  if (dateini<=mdy(&month,&day,&year) ) & (datefin>=mdy(&month,&day,&year) );
  tipotr=1*tipotrab;
  run;

proc sort data=temp1 nodupkey;
    by nss mod registro descending dateini datefin descending sal;
    run;

*proc means; run;
    
data temp1h (keep=dateini datefin nss registro sal tipotr mod);
  length nss $ 11 dateini $ 10 datefin $ 10;

  infile "&tmp./SShc_ciz1*m.txt" dlm=" " firstobs=2 expandtabs missover
      pad ; * cluster;
  input nss $ registro $ mod  dateini $ datefin $ orimovini orimovfin tpmovini tpmovfin sal tipotrab;
  
  yearini=1*substr(dateini,1,4);
  monini=1*substr(dateini,6,2);
  dayini=1*substr(dateini,9,2);
  dateini=mdy(monini,dayini,yearini);
  yearfin=1*substr(datefin,1,4);
  monfin=1*substr(datefin,6,2);
  dayfin=1*substr(datefin,9,2);
  datefin=mdy(monfin,dayfin,yearfin);
  datefin=min(datefin, mdy(&month,&day,&year));
  if (dateini<=mdy(&month,&day,&year) ) & (datefin>=mdy(&month,&day,&year) );
  tipotr=1*tipotrab;
  run;

proc sort data=temp1h nodupkey;
    by nss mod registro descending dateini datefin descending sal;
    run;

*proc means; run;
    
data temp2 (keep=dateini datefin nss registro sal tipotr mod);
  length nss $ 11 dateini $ 10 datefin $ 10;        
  
  infile "&tmp./SSci_ciz2*m.txt"  dlm=" " firstobs=2
      expandtabs missover pad ; * cluster;
  
  input nss $ registro $ mod  dateini $ datefin $ orimovini orimovfin tpmovini tpmovfin sal tipotrab;
  yearini=1*substr(dateini,1,4);
  monini=1*substr(dateini,6,2);
  dayini=1*substr(dateini,9,2);
  dateini=mdy(monini,dayini,yearini);
  yearfin=1*substr(datefin,1,4);
  monfin=1*substr(datefin,6,2);
  dayfin=1*substr(datefin,9,2);
  datefin=mdy(monfin,dayfin,yearfin);
  datefin=min(datefin, mdy(&month,&day,&year));
  if (dateini<=mdy(&month,&day,&year) ) & (datefin>=mdy(&month,&day,&year) );
  tipotr=1*tipotrab;
  run;

proc sort data=temp2 nodupkey;
    by nss mod registro descending dateini datefin descending sal;
    run;

*proc means; run;
    
data temp2h (keep=dateini datefin nss registro sal tipotr mod);
  length nss $ 11 dateini $ 10 datefin $ 10;

  infile "&tmp./SShc_ciz2*m.txt"  dlm=" " firstobs=2 expandtabs missover pad; * cluster;
  
  input nss $ registro $ mod  dateini $ datefin $ orimovini orimovfin tpmovini tpmovfin sal tipotrab;
  yearini=1*substr(dateini,1,4);
  monini=1*substr(dateini,6,2);
  dayini=1*substr(dateini,9,2);
  dateini=mdy(monini,dayini,yearini);
  yearfin=1*substr(datefin,1,4);
  monfin=1*substr(datefin,6,2);
  dayfin=1*substr(datefin,9,2);
  datefin=mdy(monfin,dayfin,yearfin);
  datefin=min(datefin, mdy(&month,&day,&year));
  if (dateini<=mdy(&month,&day,&year) ) & (datefin>=mdy(&month,&day,&year) );
  tipotr=1*tipotrab;
  run;

proc sort data=temp2h nodupkey;
    by nss mod registro descending dateini datefin descending sal;
    run;
    
*proc means; run;
    
data temp3 (keep=dateini datefin nss registro sal tipotr mod);
  length nss $ 11 dateini $ 10 datefin $ 10 saltext $ 10 tipotrab $ 10;
  
  infile "&tmp./SSci_ciz3*m.txt"  dlm=" " firstobs=2 expandtabs missover pad; * cluster;
  
  input nss $ registro $ mod  dateini $ datefin $ orimovini orimovfin tpmovini tpmovfin tipotrab $ saltext $;
  
  *sas has trouble reading sal if tipotrab is missing, so if sal is missing, look in tipotrab;
  saltext=right(saltext);
  saltexta=substr(saltext,1,10);
  sal=1*saltexta;
  if (sal=.) then saltext=right(tipotrab);
  if (sal=.) then tipotrab=.;
  if (sal=.) then saltexta=substr(saltext,1,9);
  if (sal=.) then sal=1*saltexta;
  tipotrab=1*tipotrab;
  yearini=1*substr(dateini,1,4);
  monini=1*substr(dateini,6,2);
  dayini=1*substr(dateini,9,2);
  dateini=mdy(monini,dayini,yearini);
  yearfin=1*substr(datefin,1,4);
  monfin=1*substr(datefin,6,2);
  dayfin=1*substr(datefin,9,2);
  datefin=mdy(monfin,dayfin,yearfin);
  datefin=min(datefin, mdy(&month,&day,&year));
  if (dateini<=mdy(&month,&day,&year) ) & (datefin>=mdy(&month,&day,&year) );
  tipotr=1*tipotrab;
  run;

proc sort data=temp3 nodupkey;
    by nss mod registro descending dateini datefin descending sal;
    run;

*proc contents; run;
*proc means; run;
    
data temp3h (keep=dateini datefin nss registro sal tipotr mod);
  length nss $ 11 dateini $ 10 datefin $ 10 saltext $ 10 tipotrab $ 10;

  infile "&tmp./SShc_ciz3*m.txt"  dlm=" " firstobs=2 expandtabs missover pad; * cluster;
  
  input nss $ registro $ mod  dateini $ datefin $ orimovini orimovfin tpmovini tpmovfin tipotrab $ saltext $;
  
  *sas has trouble reading sal if tipotrab is missing, so if sal is missing, look in tipotrab;
  saltext=right(saltext);
  saltexta=substr(saltext,1,10);
  sal=1*saltexta;
  if (sal=.) then saltext=right(tipotrab);
  if (sal=.) then tipotrab=.;
  if (sal=.) then saltexta=substr(saltext,1,9);
  if (sal=.) then sal=1*saltexta;
  tipotrab=1*tipotrab;
  yearini=1*substr(dateini,1,4);
  monini=1*substr(dateini,6,2);
  dayini=1*substr(dateini,9,2);
  dateini=mdy(monini,dayini,yearini);
  yearfin=1*substr(datefin,1,4);
  monfin=1*substr(datefin,6,2);
  dayfin=1*substr(datefin,9,2);
  datefin=mdy(monfin,dayfin,yearfin);
  datefin=min(datefin, mdy(&month,&day,&year));
  if (dateini<=mdy(&month,&day,&year) ) & (datefin>=mdy(&month,&day,&year) );
  tipotr=1*tipotrab;
  run;
  
proc sort data=temp3h nodupkey;
    by nss mod registro descending dateini datefin descending sal;
    run;

*proc contents; run;
*proc means; run;

data working.compdata&year.&month.&day. (keep=nss registro sal tipotr mod year compdata);
  merge temp1 temp1h temp2 temp2h temp3 temp3h;
  by nss mod registro descending dateini datefin descending sal;
  if first.registro;
  afil=1*nss;
  if afil>=1000000000;
  if (mod>0);
  year=&year.;
  compdata=1;
  run;

proc datasets;
    delete temp1 temp1h temp2 temp2h temp3 temp3h;
run;

%addsexo;
  
*proc print; run;
*proc means; run;

%end;
    
%mend generatedata;


%macro addsexo;

proc sort data=working.compdata&year.&month.30;
    by nss;
run;

data working.compdata&year.&month.30;
  merge working.compdata&year.&month.30 d2;
  by nss;
  if compdata=1;
  run;

%mend addsexo;

*************************************************************;
****************** main program *****************************;
*************************************************************;

**************** read ASEGURADOS file that has sexo ************;

data tmp.asegurados (keep=nss sexo);
  length nss $ 11 sexo 3;
  infile "&tmp./ASEGURADOS_20060905.TXT" expandtabs missover pad; * cluster;
  input nss $ 1-11 sexo 12;
  if sexo not in (0,1,2) then sexo=.;
  run;


*** make sure there is only one listing per nss;
    
proc sort data=tmp.asegurados;
    by nss descending sexo;
run;

* note: set sexo=4 if the same nss is reported as both male and female;

data d1;
    set tmp.asegurados;
    by nss descending sexo;
    lag_sexo=lag(sexo);
    if not first.nss then do;
	if sexo=0 then delete;
       else;
 	  if lag_sexo~=sexo then sexo=4; 
	end;
run;

* note: keep only sexo=4 for values of nss where it applies;

proc sort data=d1;
    by nss descending sexo;
run;
data d2;
    set d1;
    by nss descending sexo;
    if first.nss;
run;

********* call macros to read 1985-2005 data *************;

%generatedata(1985,2005,3,30);
%generatedata(1985,2005,6,30);
%generatedata(1985,2005,9,30);
%generatedata(1985,2005,12,30);
